install.packages("tidyverse")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
time_series_confirmed_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
# Let's get the times series data for deaths
time_series_deaths_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Deaths")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
# Create Keys
time_series_confirmed_long <- time_series_confirmed_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
time_series_deaths_long <- time_series_deaths_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".") %>%
select(Key, Deaths)
# Join tables
time_series_long_joined <- full_join(time_series_confirmed_long,
time_series_deaths_long, by = c("Key")) %>%
select(-Key)
# Reformat the data
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)
# Create Report table with counts
time_series_long_joined_counts <- time_series_long_joined %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
names_to = "Report_Type", values_to = "Counts")
Interactive Graph
install.packages("plotly")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(
time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region == "US") %>%
ggplot(aes(x = Date, y = Deaths)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Deaths")
)
Animated Graph
install.packages("gganimate")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("transformr")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("gifski")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(gganimate)
library(transformr)
library(gifski)
library(ggplot2)
theme_set(theme_bw())
data_time <- time_series_long_joined %>%
group_by(Country_Region, Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US"))
p <- ggplot(data_time, aes(x = Date, y = Confirmed, color = Country_Region)) +
geom_point() +
geom_line() +
ggtitle("Confirmed COVID-19 Cases") +
geom_point(aes(group = seq_along(Date))) +
transition_reveal(Date)
# Some people needed to use this line instead
# animate(p,renderer = gifski_renderer(), end_pause = 15)
animate(p, end_pause = 15)

Challenge 1
Challenge 2
Turn one of the exercises from Lab 5 into an interactive graph with plotyly
time_series_long_joined_ratio <- time_series_long_joined %>%
group_by(Country_Region, Date) %>%
summarise(Total_Confirmed = sum(Confirmed), Total_Deaths = sum(Deaths)) %>%
mutate(Ratio = Total_Deaths / Total_Confirmed)
## `summarise()` regrouping output by 'Country_Region' (override with `.groups` argument)
time_series_long_total_deaths <- time_series_long_joined_ratio %>%
group_by(Country_Region) %>%
summarise(Deaths = sum(Total_Deaths)) %>%
arrange(desc(Deaths)) %>%
slice(1:10)
## `summarise()` ungrouping output (override with `.groups` argument)
g <- time_series_long_joined_ratio %>%
filter(Country_Region %in% c("US", "Brazil", "United Kingdom", "Italy", "Mexico", "France", "Spain", "India", "Iran", "Peru")) %>%
ggplot(aes(x = Date, y = Total_Deaths, fill = Country_Region, color = Country_Region)) +
geom_point() +
geom_line() +
ggtitle("The Top 10 Countries by Total Deaths") +
transition_reveal(Date)
animate(g,renderer = gifski_renderer(), end_pause = 15)

Challenge 3
Create an animated graph of your choosing using the time series data to display an aspect (e.g. states or countries) of the data that is important to you.
#Data Wrangling
download.file(url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv", destfile = "data/time_series_covid19_confirmed_US.csv")
time_series_covid19_confirmed_US <- read.csv("data/time_series_covid19_confirmed_US.csv")
time_series_covid19_confirmed_US_totals <- time_series_covid19_confirmed_US %>% select(-c(UID,iso2,iso3,code3,FIPS,Lat,Long_,Combined_Key,Admin2,Country_Region)) %>% group_by(Province_State) %>% summarise_each(funs(sum))
## Warning: `summarise_each_()` is deprecated as of dplyr 0.7.0.
## Please use `across()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
time_series_covid19_confirmed_US_totals <- time_series_covid19_confirmed_US_totals %>% rename_at(vars(starts_with("X")), funs(str_remove(., "X")))
confirmed_US_totals_long <- time_series_covid19_confirmed_US_totals %>% pivot_longer(-c(Province_State), names_to = "Date", values_to = "Confirmed")
confirmed_US_totals_long$Date <- as.Date(confirmed_US_totals_long$Date, format = "%m.%d.%y")
ten_highest_confirmed_df <- confirmed_US_totals_long %>%
group_by(Province_State) %>%
summarise("Confirmed_Total"= sum(Confirmed)) %>%
arrange(desc(Confirmed_Total)) %>%
head(10)
## `summarise()` ungrouping output (override with `.groups` argument)
#Graph!
c <- confirmed_US_totals_long %>%
filter(Province_State %in% ten_highest_confirmed_df$Province_State) %>%
ggplot(aes(x = Date, y = Confirmed)) +
geom_point() +
geom_line() +
ggtitle("Top 10 US States by Total Confirmed Cases") +
facet_wrap(~Province_State) +
transition_reveal(Date)
animate(c, end_pause = 15)
